##########
# Crossing Borders: Econ analysis
#########

rm(list = ls())

library(lfe)
library(texreg)
library(here)
library(data.table)
library(qs)
library(tidyverse)
library(patchwork)
library(cowplot)

source(here("code", "functions.R"))


# Load data ---------------------------------------------------------------
#Data from Beerli et al. Note since this data covers an earlier period than most of the analyses in the paper. This data uses earlier municipality data
sess_all <- readstata13::read.dta13(here("data", "sess_merged.dta"))


#Defined treatment variables. Note since this data covers an earlier period than most of the analyses in the paper. This data uses earlier municipality data
sess_all$transBorder15 <- 0
sess_all$transBorder15[which(sess_all$bin15 == 1 & sess_all$year %in% (2000:2003))] <- 1
sess_all$transBorder15 %>% table

sess_all$freeBorder15 <- 0
sess_all$freeBorder15[which(sess_all$bin15 == 1 & sess_all$year > 2003)] <- 1
sess_all$freeBorder15 %>% table

sess_all$border15 <- 0
sess_all$border15[which(sess_all$bin15 == 1)] <- 1

sess_all$border30 <- 0
sess_all$border30[which(sess_all$bin15 == 2)] <- 1

for(y in seq(1994, 2010, by = 2)){
  sess_all[,paste0("border15_", y)] <- as.integer(sess_all$year == y) * sess_all$border15
}


#Filter to Swiss workers
sess_ch <- sess_all %>% filter(nat4 == "ch")

sess_imm <- sess_all %>% filter(nat4 == "ie")


# Main models -----------------------------------------------------
#All hourly wages
mod_wage <- felm(LNwagerhrl ~ transBorder15 + freeBorder15 | bfsnr + year | 0 | bfsnr, data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all"))

#All Workers
mod_worker <- felm(LNworker ~ transBorder15 + freeBorder15 | bfsnr + year | 0 | bfsnr, data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all"))

#Event study wages
mod_wage_es <- felm(LNwagerhrl ~ border15_1994 + border15_1996 + border15_2000 + border15_2002 + border15_2004 + border15_2006 + 
                      border15_2008 + border15_2010 | bfsnr + year | 0 | bfsnr, data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all"))

#Event study FTE
mod_worker_es <- felm(LNworker ~ border15_1994 + border15_1996 + border15_2000 + border15_2002 + border15_2004 + border15_2006 + 
                        border15_2008 + border15_2010 | bfsnr + year | 0 | bfsnr, data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all"))


# Models with weights -----------------------------------------------------
#All hourly wages
mod_wage_weights <- felm(LNwagerhrl ~ transBorder15 + freeBorder15 | bfsnr + year | 0 | bfsnr,
                         weights = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)) %>% pull(worker),
                         data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)))

#All Workers
mod_worker_weights <- felm(LNworker ~ transBorder15 + freeBorder15 | bfsnr + year | 0 | bfsnr, 
                   weights = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)) %>% pull(worker),
                   data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)))


#Event study wages
mod_wage_es_weights <- felm(LNwagerhrl ~ border15_1994 + border15_1996 + border15_2000 + border15_2002 + border15_2004 + border15_2006 + 
                      border15_2008 + border15_2010 | bfsnr + year | 0 | bfsnr, 
                    weights = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)) %>% pull(worker),
                    data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)))

#Event study FTE
mod_worker_es_weights <- felm(LNworker ~ border15_1994 + border15_1996 + border15_2000 + border15_2002 + border15_2004 + border15_2006 + 
                        border15_2008 + border15_2010 | bfsnr + year | 0 | bfsnr, 
                      weights = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)) %>% pull(worker),
                      data = sess_ch %>% filter(distmin <= 30, BR == 1, edu2 == "0all", complete.cases(worker)))







# Wage Plots --------------------------------------------------------------
sess_ch$TR_three <- NULL
sess_ch$TR_three[sess_ch$border15 == 1] <- "0-15 Minutes"
sess_ch$TR_three[sess_ch$border30 == 1] <- "15-30 Minutes"

## Time trends
wage_raw <- sess_ch %>% filter(complete.cases(LNwagerhrl), complete.cases(year), complete.cases(TR_three)) %>% filter(distmin <= 30, edu2 == "0all") %>% 
  ggplot(aes(x = year, y = LNwagerhrl, color = TR_three, fill = TR_three, linetype = TR_three, shape = TR_three)) + 
  stat_summary(fun.y = mean, geom = "point") + 
  geom_smooth(method = "loess") + 
  geom_vline(xintercept = c(1999, 2004), linetype = "dashed", color = "black") +
  ylab("Real Hourly Wage (Mean, Log)") +
  xlab("Year") +
  theme_minimal() + 
  scale_x_continuous(breaks = seq(1991, 2019, 2), expand = expansion(mult = 0, add = 0.5)) +
  scale_color_manual(values = cbPalette[-3], name = NULL, guide = "legend") +
  scale_fill_manual(values = cbPalette[-3], name = NULL, guide = "legend") +
  scale_linetype_manual(values = c("solid", "dashed", "dotted"), name = NULL, guide = "legend") +
  scale_shape_manual(values = c(16, 17, 18), name = NULL, guide = "legend") +
  scale_y_continuous(expand = expand_scale(mult = 0, add = 0.025)) +
  annotate("text", x = 1996, y = 3.56, label = "Pre-Reform", hjust = 0.5) +
  annotate("text", x = 2001.5, y = 3.56, label = "Transition", hjust = 0.5) +
  annotate("text", x = 2007, y = 3.56, label = "Free Movement", hjust = 0.5) +
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt"),
        axis.title.y = element_text(vjust = 0)) +
  ggtitle("A. Real Hourly Wage in the Border Region")


## Event study
event_study_dat_wage <- data.frame(
  y = coef(mod_wage_es),
  ymin = coef(mod_wage_es) - qt(.975, df = df.residual(mod_wage_es))*sqrt(diag(vcov(mod_wage_es))),
  ymax = coef(mod_wage_es) + qt(.975, df = df.residual(mod_wage_es))*sqrt(diag(vcov(mod_wage_es))),
  Year = as.numeric(gsub("border[[:digit:]][[:digit:]]_", "", names(coefficients(mod_wage_es))))
)

event_study_dat_wage <- rbind(setDT(event_study_dat_wage), data.table(c(0), c(0), c(0), c(1998)), use.names = F)

wage_event <- ggplot(data = event_study_dat_wage, aes(x = Year, y = y, ymin = ymin, ymax = ymax, 
                                                      color = factor(ifelse(Year < 2000, 3, ifelse(Year < 2004, 2, 1))),
                                                      shape = factor(ifelse(Year <= 1999, 3, ifelse(Year < 2004, 2, 1)))
                                                      )) + 
  geom_vline(xintercept = c(1999, 2004), linetype = "dashed", color = "black") +
  geom_hline(yintercept = 0, linetype = 2, color = "black") +
  geom_pointrange() +
  annotate("text", x = 1996, y = 0.1, label = "Pre-Reform", hjust = 0.5) +
  annotate("text", x = 2001.5, y = 0.1, label = "Transition", hjust = 0.5) +
  annotate("text", x = 2007, y = 0.1, label = "Free Movement", hjust = 0.5) +
  scale_color_manual(values = cbPalette[c(1,2,8)], name = NULL, labels = c("Free Movement", "Transition", "Pre-Reform"), guide = guide_legend(reverse = T)) +
  scale_shape_manual(values = c(16, 17, 18), 
                     name = NULL, 
                     labels = c("Free Movement", "Transition", "Pre-Reform"), 
                     guide = guide_legend(reverse = T)) +
  ylab("Real Hourly Wages (Mean, Log)") +
  theme_minimal() + 
  scale_x_continuous(breaks = seq(1994, 2010,2)) +
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt")) +
  ggtitle("B. Effect of Border Proximity on Real Hourly Wages")


## DiD
wage_did <- ggplot(coef.gg.prep(list(mod_wage)), aes(x = modelcoef, y = -1*y, xmin = ylo95, xmax = yhi95, color = variable, shape = variable, group = variable)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  ggstance::geom_linerangeh(position = ggstance::position_dodgev(.25)) +
  ggstance::geom_pointrangeh(aes(xmin = ylo90, xmax = yhi90), size = 1.25, position = ggstance::position_dodgev(.25)) +
  ylab(NULL) +
  scale_y_continuous(breaks = c(-2, -1), labels = c("Transition\nPeriod", "Free\nMovement"), expand = expand_scale(mult = 0, add = 0.5)) +
  scale_color_manual(name = NULL, guide = "none", values = cbPalette[-3]) +
  scale_shape_discrete(name = NULL, guide = "none") +
  xlab("Effect of Border Proximity on Real Wages") +
  ggtitle("C. Difference-in-Differences Estimates") +
  theme_bw() + 
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt"))


## Combined plot export
plots <- align_patches(wage_raw, wage_did)
top_row <- plot_grid(plots[[1]], wage_event, align = "h")
pdf(here("figures", "Fig5.pdf"), height = 8.27, width = 11.69)
plot_grid(top_row, plots[[2]], ncol = 1, rel_heights = c(3, 1.5))
dev.off()


# Worker Plots --------------------------------------------------------------

## Time trends
workers_raw <- sess_ch %>% filter(complete.cases(LNworker), complete.cases(year), complete.cases(TR_three)) %>% filter(distmin <= 30) %>% 
  ggplot(aes(x = year, y = LNworker, color = TR_three, fill = TR_three, linetype = TR_three, shape = TR_three)) + 
  stat_summary(fun.y = mean, geom = "point") + 
  geom_smooth(method = "loess") + 
  geom_vline(xintercept = c(1999, 2004), linetype = "dashed", color = "black") +
  ylab("Total Workers (Log)") +
  xlab("Year") +
  theme_minimal() + 
  scale_x_continuous(breaks = seq(1991, 2019, 2), expand = expansion(mult = 0, add = 0.5)) +
  scale_color_manual(values = cbPalette[-3], name = NULL, guide = "legend") +
  scale_fill_manual(values = cbPalette[-3], name = NULL, guide = "legend") +
  scale_linetype_manual(values = c("solid", "dashed", "dotted"), name = NULL, guide = "legend") +
  scale_shape_manual(values = c(16, 17, 18), name = NULL, guide = "legend") +
  scale_y_continuous(expand = expand_scale(mult = 0, add = 0.025)) +
  annotate("text", x = 1996, y = 5, label = "Pre-Reform", hjust = 0.5) +
  annotate("text", x = 2001.5, y = 5, label = "Transition", hjust = 0.5) +
  annotate("text", x = 2007, y = 5, label = "Free Movement", hjust = 0.5) +
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt"),
        axis.title.y = element_text(vjust = 0)) +
  ggtitle("A. Employment in the Border Region")

## Event study
event_study_dat_worker <- data.frame(
  y = coef(mod_worker_es),
  ymin = coef(mod_worker_es) - qt(.975, df = df.residual(mod_worker_es))*sqrt(diag(vcov(mod_worker_es))),
  ymax = coef(mod_worker_es) + qt(.975, df = df.residual(mod_worker_es))*sqrt(diag(vcov(mod_worker_es))),
  Year = as.numeric(gsub("border[[:digit:]][[:digit:]]_", "", names(coefficients(mod_worker_es))))
)

event_study_dat_worker <- rbind(setDT(event_study_dat_worker), data.table(c(0), c(0), c(0), c(1998)), use.names = F)

workers_event <- ggplot(data = event_study_dat_worker, aes(x = Year, y = y, ymin = ymin, ymax = ymax, 
                                                           color = factor(ifelse(Year < 2000, 3, ifelse(Year < 2004, 2, 1))),
                                                           shape = factor(ifelse(Year <= 1999, 3, ifelse(Year < 2004, 2, 1)))
                                                           )) + 
  geom_vline(xintercept = c(1999, 2004), linetype = "dashed", color = "black") +
  geom_hline(yintercept = 0, linetype = 2, color = "black") +
  geom_pointrange() +
  annotate("text", x = 1996, y = .3, label = "Pre-Reform", hjust = 0.5) +
  annotate("text", x = 2001.5, y = .3, label = "Transition", hjust = 0.5) +
  annotate("text", x = 2007, y = .3, label = "Free Movement", hjust = 0.5) +
  scale_color_manual(values = cbPalette[c(1,2,8)], name = NULL, labels = c("Free Movement", "Transition", "Pre-Reform"), guide = guide_legend(reverse = T)) +
  scale_shape_manual(values = c(16, 17, 18), 
                     name = NULL, 
                     labels = c("Free Movement", "Transition", "Pre-Reform"), 
                     guide = guide_legend(reverse = T)) + 
  ylab("Number of Workers (Log)") +
  theme_minimal() + 
  scale_x_continuous(breaks = seq(1994, 2010,2)) +
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt")) +
  ggtitle("B. Effect of Border Proximity on Employment")


## DiD
workers_did <- ggplot(coef.gg.prep(list(mod_worker)), aes(x = modelcoef, y = -1*y, xmin = ylo95, xmax = yhi95, color = variable, shape = variable, group = variable)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  ggstance::geom_linerangeh(position = ggstance::position_dodgev(.25)) +
  ggstance::geom_pointrangeh(aes(xmin = ylo90, xmax = yhi90), size = 1.25, position = ggstance::position_dodgev(.25)) +
  ylab(NULL) +
  scale_y_continuous(breaks = c(-2, -1), labels = c("Transition\nPeriod", "Free\nMovement"), expand = expand_scale(mult = 0, add = 0.5)) +
  scale_color_manual(name = NULL, guide = "none", values = cbPalette[-3]) +
  scale_shape_discrete(name = NULL, guide = "none") +
  xlab("Effect of Border Proximity on Employment") +
  ggtitle("C. Difference-in-Differences Estimates") +
  theme_bw() + 
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt"))


plots <- align_patches(workers_raw, workers_did)
top_row <- plot_grid(plots[[1]], workers_event, align = "h")
pdf(here("figures", "Fig4.pdf"), height = 8.27, width = 11.69)
plot_grid(top_row, plots[[2]], ncol = 1, rel_heights = c(3, 1.5))
dev.off()


# Regression Tables -------------------------------------------------------
Texreg(
  list(
    mod_worker,
    mod_worker_weights,
    mod_worker_es,
    mod_worker_es_weights,
    mod_wage,
    mod_wage_weights,
    mod_wage_es,
    mod_wage_es_weights),
  custom.header = list("No. of workers (log)" = 1:4, "Mean log hourly wage" = 5:8),
  custom.gof.rows = list("Weights" = rep(c("No", "Yes"), 4)),
  custom.coef.map = list(
    "transBorder15" = "0--15 Minutes X Transition",
    "freeBorder15" = "0--15 Minutes X Free",
    "border15_1994" = "0--15 Minutes X 1994",
    "border15_1996" = "0--15 Minutes X 1996",
    "border15_2000" = "0--15 Minutes X 2000",
    "border15_2002" = "0--15 Minutes X 2002",
    "border15_2004" = "0--15 Minutes X 2004",
    "border15_2006" = "0--15 Minutes X 2006",
    "border15_2008" = "0--15 Minutes X 2008",
    "border15_2010" = "0--15 Minutes X 2010"),
  caption = "Effects on Economic Outcomes. Outcomes: Number of Workers (Logged) and Real Hourly Wages (Logged). Sample: Border Region, Travel Minutes $<=$ 30. Baseline Year: 1998. Weights Represent the Number of Workers.",
  label = "tab:econ_outcomes",
  file = here("tables", "econ_outcomes.tex"),
  single.row = T
)




# Immigrants education ----------------------------------------------------
rm(list = ls())
gc()
library(haven)
sess_micro <- read_dta(here("data", "labor_market", "d01_sess_1994_2010.dta"))

DE <- c("AG", "AI", "AR", "BE", "BL", "BS", "FR", "GL", "GR", "JU", "LU", "NW", "OW", "SG", "SH", "SO", "SZ", "TG", "UR", "VS", "ZG", "ZH")
FR <- c("BE", "FR", "GE", "JU", "NE", "VD", "VS")
IT <- "TI"

sess_micro$nat4 %>% table
sess_micro <- sess_micro %>% filter(nat4 %in% c("ir", "icbw")) %>% mutate(languageReg = case_when(arbkto %in% DE ~ "DE",
                                                                       arbkto %in% FR ~ "FR",
                                                                       arbkto %in% IT ~ "IT"))

sess_micro %>% 
  filter(edu2 != "", complete.cases(weight)) %>%
  group_by(languageReg, nat4) %>% 
  summarise(high_ed = weighted.mean(edu2 == "1high", weight = weight),
            low_ed = weighted.mean(edu2 == "2low", weight = weight))

